import time
from openai import OpenAI


LLM_CHAT_API_KEY="3KnSx1QPYUdmziUzLC81yHnCLvCgqA2A"
QWEN_32B_CHAT_API_URL="http://192.168.211.131:8901/v1"
QWEN_32B_CHAT_API_MODEL_NAME="QWEN-32B"

client = OpenAI(api_key=LLM_CHAT_API_KEY, base_url=QWEN_32B_CHAT_API_URL)
model_name=QWEN_32B_CHAT_API_MODEL_NAME
start_time = time.time()
chat_response = client.chat.completions.create(
        model=model_name,
        messages=[
            {
                "role": "user", 
                "content": "以下文本是否为辱骂性或敏感性问题？如果是，请回答 '是'，否则回答 '否'。\n文本:你好 \n回答:"
            },
        ],
        max_tokens=3840,
        temperature=0,
        seed=42
    )
result = chat_response.choices[0].message.content
end_time = time.time()
print(result)
print(chat_response)
total_time = end_time - start_time
print(f"{total_time}s")
completion_tokens = chat_response.usage.completion_tokens
average_tokens_per_second = completion_tokens / total_time
print(f"{average_tokens_per_second:.2f} tokens/s")
